# delimit ;
clear ;
cd "replication" ;
set more off ;

* *************************************************************************** ;
* *************************************************************************** ;
* 					midline health knowledge index
* *************************************************************************** ;
* *************************************************************************** ;

* re-run original analysis to get all the variabes ;
cd ".." ;
do "./replication/do/table03-midline-knowledge-aggregate.do" ;

* *************************************************************************** ;
* alternative ways of constructing midline knowledge index
* *************************************************************************** ;

* ********************************************
* Method 1: equally-weighted subcomponents 
* ******************************************** ;

egen double midline_score_v1 = rowmean(score_cleanliness score_midwife score_condom) ; 
assert round(midline_score_v1, 0.001) == round((1/3)*score_cleanliness + (1/3)*score_midwife + (1/3)*score_condom, 0.001) ;

* ********************************************
* Method 2: Inverse Covariance Weighting 
* ******************************************** ;

* step 1: standardize each of the score components using the control group's mean and standard deviation ;
foreach var of varlist score_cleanliness score_midwife score_condom { ; 
	summarize `var' if control == 1 ; 
	gen std_`var'  = (`var'-`r(mean)')/`r(sd)' ; 
} ; 

* step 2: compute the variance-covariance matrix of the standardized variables; 
correlate std_score_cleanliness std_score_midwife std_score_condom, covariance ; 
matrix R = r(C) ;

* step 3: generate a weighted average of the standardized variables, where the weights are proportional to the sums of the rows of the inverted variance-covariance matrix;
matrix Rinv = inv(R) ;
local weight1 = Rinv[1,1] + Rinv[1,2] + Rinv[1,3] ; 
local weight2 = Rinv[2,1] + Rinv[2,2] + Rinv[2,3] ; 
local weight3 = Rinv[3,1] + Rinv[3,2] + Rinv[3,3] ; 
local totweight = `weight1' + `weight2' + `weight3' ;

gen midline_score_v2 = (`weight1'*std_score_cleanliness + `weight2'*std_score_midwife + `weight3'*std_score_condom)/`totweight' ; 

* check that what I get from the above three steps is the same as the user-written command icw_index ; 
icw_index std_score_cleanliness std_score_midwife std_score_condom, gen(midline_score_v2_check) ;
assert round(midline_score_v2, 0.001) == round(midline_score_v2_check, 0.001); 
summarize midline_score_v2 midline_score_v2_check ;
drop midline_score_v2_check ;

* ********************************************
* Method 3: Principal Components Analysis
* ******************************************** ;

* step 1: run PCA and extract the first principal component ;
* step 2: standardize the first principal component using the control group mean and SD ;
 
pca score_cleanliness score_midwife score_condom ;
predict component1 ;

summarize component1 if control == 1 ;
gen midline_score_v3 = (component1 - `r(mean)')/`r(sd)' ;
summarize midline_score_v3 if control == 1; 
drop component1 ; 

* *******************************************************************************
* regressions
* ******************************************************************************* ;

* original result ;
gen midline_score_orig = score_tot ;

* label variables for output table ; 
label var midline_score_orig "\shortstack[l]{Original}" ;
label var midline_score_v1 "\shortstack[l]{Equally\\weighted\\subcomponents}" ;
label var midline_score_v2 "ICW" ;
label var midline_score_v3 "\shortstack[l]{First\\principal\\component}" ;

foreach var of varlist midline_score_orig midline_score_v1 midline_score_v2 midline_score_v3 { ;

	areg `var' healthonly healthandpay, a(strata) robust ; 
	qui est sto `var'; 
		
	test healthonly = healthandpay ; 
	estadd local titlerow = "" ; 
	estadd scalar fstat = r(F): `var' ; 
	estadd scalar pval = r(p): `var' ; 

	summarize `var' if control == 1 & e(sample) == 1;
	estadd scalar cmean = r(mean): `var' ; 
	estadd scalar csd = r(sd): `var' ;
} ;

* *************************************************************************** ;
* *************************************************************************** ;
* 					endline health knowledge index
* *************************************************************************** ;
* *************************************************************************** ;

* re-run original analysis to get all the variabes ;
cd "..";
do "./replication/do/table04-endline-knowledge-aggregate.do" ;

* *******************************************************************************
* alternative ways of constructing endline knowledge index
* ******************************************************************************* ;

* ********************************************
* Method 1: equally-weighted subcomponents 
* ******************************************** ;

egen double endline_score_v1 = rowmean(module1_know module23_know module4_know module5_know) ; 
assert round(endline_score_v1, 0.001) == round((1/4)*module1_know + (1/4)*module23_know + (1/4)*module4_know + (1/4)*module5_know, 0.001) ;

* ********************************************
* Method 2: Inverse Covariance Weighting 
* ******************************************** ;

* step 1: standardize each of the score components using the control group's mean and standard deviation ;
foreach var of varlist module1_know module23_know module4_know module5_know { ; 
	summarize `var' if control == 1 ; 
	gen std_`var'  = (`var'-`r(mean)')/`r(sd)' ; 
} ; 

* step 2: compute the variance-covariance matrix of the standardized variables; 
correlate std_module1_know std_module23_know std_module4_know std_module5_know , covariance ; 
matrix R = r(C) ;

* step 3: generate a weighted average of the standardized variables, where the weights are proportional to the sums of the rows of the inverted variance-covariance matrix;
matrix Rinv = inv(R) ;

macro drop weight1 weight2 weight3 totweight ;
local weight1 = Rinv[1,1] + Rinv[1,2] + Rinv[1,3] + Rinv[1,4] ; 
local weight2 = Rinv[2,1] + Rinv[2,2] + Rinv[2,3] + Rinv[2,4] ; 
local weight3 = Rinv[3,1] + Rinv[3,2] + Rinv[3,3] + Rinv[3,4] ; 
local weight4 = Rinv[4,1] + Rinv[4,2] + Rinv[4,3] + Rinv[4,4] ; 
local totweight = `weight1' + `weight2' + `weight3' + `weight4' ;

gen endline_score_v2 = (`weight1'*std_module1_know + `weight2'*std_module23_know + `weight3'*std_module4_know + `weight4'*std_module5_know)/`totweight' ; 

* check that what I get from the above three steps is the same as the user-written command icw_index ; 
icw_index std_module1_know std_module23_know std_module4_know std_module5_know , gen(endline_score_v2_check) ;
assert round(endline_score_v2, 0.001) == round(endline_score_v2_check, 0.001); 
summarize endline_score_v2 endline_score_v2_check ;
drop endline_score_v2_check ;

* ********************************************
* Method 3: Principal Components Analysis
* ******************************************** ;

* step 1: run PCA and extract the first principal component ;
* step 2: standardize the first principal component using the control group mean and SD ;
pca module1_know module23_know module4_know module5_know ;
predict component1 ;

summarize component1 if control == 1 ;
gen endline_score_v3 = (component1 - `r(mean)')/`r(sd)' ;
summarize endline_score_v3 if control == 1; 

* *******************************************************************************
* endline regressions
* ******************************************************************************* ;

* original result ;
gen endline_score_orig = module_all ;

* label variables for output table ; 
label var endline_score_orig "\shortstack[l]{Original}" ;
label var endline_score_v1 "\shortstack[l]{Equally\\weighted\\sub-\\components}" ;
label var endline_score_v2 "\shortstack[l]ICW" ;
label var endline_score_v3 "\shortstack[l]{First\\principal\\component}" ;

foreach var of varlist endline_score_orig endline_score_v1 endline_score_v2 endline_score_v3 { ; 
	areg `var' healthonly healthandpay, a(strata) robust ; 
	qui est sto `var'; 
		
	test healthonly = healthandpay ; 
	estadd local titlerow = "" ; 
	estadd scalar fstat = r(F): `var' ; 
	estadd scalar pval = r(p): `var' ; 

	summarize `var' if control == 1 & e(sample) == 1;
	estadd scalar cmean = r(mean): `var' ; 
	estadd scalar csd = r(sd): `var' ;
} ;

* *******************************************************************************
* output table 
* ******************************************************************************* ;

* create empty midline variable so that they can be labelled in the output table ;
gen midline_score_orig = . ;
gen midline_score_v1 = . ;
gen midline_score_v2 = . ;
gen midline_score_v3 = . ;
label var midline_score_orig "\shortstack[l]{Original}" ;
label var midline_score_v1 "\shortstack[l]{Equally\\weighted\\sub-\\components}" ;
label var midline_score_v2 "\shortstack[l]ICW" ;
label var midline_score_v3 "\shortstack[l]{First\\principal\\component}" ;

esttab midline_score* endline_score* using "./output/appendix-table-alternative-knowledge-index.tex",
	replace
	drop(_cons) 
	cells(b(label() star fmt(%9.3f %9.3f)) se(par)) 
	star(* 0.10 ** 0.05 *** 0.01) 
	stats(titlerow fstat pval cmean csd N, fmt(%9s %9.3f %9.3f %9.3f %9.3f %9.0f) labels("\(F\)-test, HEE = HEEC" "$\qquad$ \(F\)-statistic" "$\qquad$ \(p\)-value" "Control Mean" "Control SD" "N"))
prehead(\begin{table}[htbp] \centering \normalsize `"\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi}"'
		\captionsetup{justification=centering} 
		\caption{Alternative Construction of Health Knowledge Index}
		\label{appendix-table-alternative-knowledge-index}
		\begin{tabular*}{\hsize}{p{4cm}p{1.8cm}p{1.8cm}p{1.8cm}p{1.8cm}p{1.8cm}p{1.8cm}p{1.8cm}p{1.8cm}}
		\toprule)
	legend label  booktabs  collabels( , none)
	mgroups(
	"Midline" 
	"Endline"
	, pattern(1 0 0 0 1 0 0 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span}))
	postfoot(`"\bottomrule"'  \end{tabular*} \captionsetup{justification=justified, width=\hsize} 
		\caption*{\footnotesize \textit{Notes:} 
		The outcome variables represent various methods of constructing an overall health knowledge index. 
		Columns 1 and 5 is based on the proportion of correct answers to all questions (i.e., reproducing the original results from Column 1 of Tables \ref{table-midline-knowledge-aggregate} and \ref{table-endline-knowledge-aggregate}). 
		In Columns 2 and 6, each sub-component of the test is weighted equally.
		Columns 3 and 7 implement Inverse Covariance Weighting (ICW). Columns 4 and 8 use the standardized first component of principal component analysis. 
		***\$\,p < 0.01$, **\$\,p < 0.05$, *\$\,p<0.10$.
		}
		\end{table}) ;

exit ;

